{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Make sure this SageMakerNotebookExecutionRole has access to Kendra"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import boto3\n",
    "import sagemaker\n",
    "import pandas as pd\n",
    "\n",
    "sess   = sagemaker.Session()\n",
    "bucket = sess.default_bucket()\n",
    "role = sagemaker.get_execution_role()\n",
    "region = boto3.Session().region_name\n",
    "\n",
    "sm = boto3.Session().client(service_name='sagemaker', region_name=region)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "kendra = boto3.client('kendra')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%store -r noheader_train_s3_uri\n",
    "\n",
    "print(noheader_train_s3_uri)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!aws s3 ls $noheader_train_s3_uri"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Create Data Access Role for Kendra"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Create Policy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# assume_role_policy_doc = {\n",
    "#   \"Version\": \"2012-10-17\",\n",
    "#   \"Statement\": [\n",
    "#     {\n",
    "#       \"Effect\": \"Allow\",\n",
    "#       \"Principal\": {\n",
    "#         \"Service\": \"kendra.amazonaws.com\"\n",
    "#       },\n",
    "#       \"Action\": \"sts:AssumeRole\"\n",
    "#     }\n",
    "#   ]\n",
    "# } "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Create Role and Attach Policies"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# iam_kendra_role_name = 'DSOAWS_Kendra'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# import json\n",
    "# import boto3\n",
    "# from botocore.exceptions import ClientError\n",
    "\n",
    "# try:\n",
    "#     iam = boto3.client('iam')\n",
    "\n",
    "#     iam_role_kendra = iam.create_role(\n",
    "#         RoleName=iam_kendra_role_name,\n",
    "#         AssumeRolePolicyDocument=json.dumps(assume_role_policy_doc),\n",
    "#         Description='DSOAWS Kendra Role'\n",
    "#     )\n",
    "# except ClientError as e:\n",
    "#     if e.response['Error']['Code'] == 'EntityAlreadyExists':\n",
    "#         iam_role_comprehend = iam.get_role(RoleName=iam_comprehend_role_name)\n",
    "#         print(\"Role already exists\")\n",
    "#     else:\n",
    "#         print(\"Unexpected error: %s\" % e)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# kendra_s3_policy_doc = {\n",
    "#     \"Version\": \"2012-10-17\",\n",
    "#     \"Statement\": [\n",
    "#         {\n",
    "#             \"Action\": [\n",
    "#                 \"s3:GetObject\"\n",
    "#             ],\n",
    "#             \"Resource\": [\n",
    "#                 \"arn:aws:s3:::{}/*\".format(bucket)\n",
    "#             ],\n",
    "#             \"Effect\": \"Allow\"\n",
    "#         },\n",
    "#         {\n",
    "#             \"Action\": [\n",
    "#                 \"s3:ListBucket\"\n",
    "#             ],\n",
    "#             \"Resource\": [\n",
    "#                 \"arn:aws:s3:::{}\".format(bucket)\n",
    "#             ],\n",
    "#             \"Effect\": \"Allow\"\n",
    "#         },\n",
    "#         {\n",
    "#             \"Action\": [\n",
    "#                 \"s3:PutObject\"\n",
    "#             ],\n",
    "#             \"Resource\": [\n",
    "#                 \"arn:aws:s3:::{}/*\".format(bucket)\n",
    "#             ],\n",
    "#             \"Effect\": \"Allow\"\n",
    "#         }\n",
    "#     ]\n",
    "# }\n",
    "\n",
    "# print(kendra_s3_policy_doc)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Attach Policy to Role"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# response = iam.put_role_policy(\n",
    "#     RoleName=iam_kendra_role_name,\n",
    "#     PolicyName='DSOAWS_KendraPolicyToS3',\n",
    "#     PolicyDocument=json.dumps(kendra_s3_policy_doc)\n",
    "# )"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Add S3 Data Source"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "prefix = 'kendra'\n",
    "\n",
    "s3_output_job = 's3://{}/{}/{}'.format(bucket, prefix, 'output')\n",
    "print(s3_output_job)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"Create an S3 data source\")\n",
    "name = 'amazon-reviews'\n",
    "description = 'amazon-reviews'\n",
    "\n",
    "index_id = 'e9d93f01-5fd3-46ba-bc73-41fae0185d3a'\n",
    "\n",
    "kendra_role_arn = 'arn:aws:iam::835319576252:role/service-role/AmazonKendra-us-east-1-dsoaws'\n",
    "\n",
    "configuration = {\n",
    "    'S3Configuration':\n",
    "    {\n",
    "        'BucketName': bucket,\n",
    "        'InclusionPrefixes': ['data/amazon_reviews_us_Digital_Software_v1_00_nohe'], # Length is limited\n",
    "    }\n",
    "}\n",
    "\n",
    "data_source_response = kendra.create_data_source(\n",
    "    Configuration = configuration,\n",
    "    Name = name,\n",
    "    Description = description,\n",
    "    RoleArn = kendra_role_arn,\n",
    "    Type = 'S3',\n",
    "    IndexId = index_id\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(data_source_response)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Wait for Kendra Data Source Creation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import time\n",
    "\n",
    "print(data_source_response)\n",
    "\n",
    "data_source_id = data_source_response['Id']\n",
    "    \n",
    "while True:\n",
    "    data_source_description = kendra.describe_data_source(\n",
    "        Id = data_source_id,\n",
    "        IndexId = index_id\n",
    "    )\n",
    "    status = data_source_description['Status']\n",
    "    print('Creating data source. Status: ' + status)\n",
    "    if status != 'CREATING':\n",
    "        break;\n",
    "    time.sleep(30)        \n",
    "    \n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Train the FAQ"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "faq_path = {\n",
    " 'Bucket': bucket,\n",
    " 'Key': 'data/amazon_reviews_us_Digital_Software_v1_00_header.csv'\n",
    "}\n",
    "\n",
    "training_job = kendra.create_faq(\n",
    "    S3Path = faq_path,\n",
    "    Name = 'amazon-reviews-faq',\n",
    "    IndexId = index_id,\n",
    "    RoleArn = kendra_role_arn\n",
    ")\n",
    "\n",
    "# training_job = comprehend.create_document_classifier(\n",
    "#     DocumentClassifierName='Amazon-Customer-Reviews-Classifier-'+ id,\n",
    "#     DataAccessRoleArn=iam_role_comprehend_arn,\n",
    "#     InputDataConfig={\n",
    "#         'S3Uri': noheader_train_s3_uri\n",
    "#     },\n",
    "#     OutputDataConfig={\n",
    "#         'S3Uri': s3_output_job\n",
    "#     },\n",
    "#     LanguageCode='en'\n",
    "# )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "print(training_job)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"Synchronize the data source.\")\n",
    "sync_response = kendra.start_data_source_sync_job(\n",
    "    Id = data_source_id,\n",
    "    IndexId = index_id\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(sync_response)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# _Please Wait Until the ^^ Data Source ^^ is Sync'd Above._"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "query = '5'\n",
    "\n",
    "response = kendra.query(\n",
    "    QueryText = query,\n",
    "    IndexId = index_id)\n",
    "\n",
    "print(response)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print ('\\nSearch results for query: ' + query + '\\n')\n",
    "for query_result in response['ResultItems']:\n",
    "    print('-------------------')\n",
    "    print('Type: ' + str(query_result['Type']))\n",
    "\n",
    "    if query_result['Type']=='ANSWER':\n",
    "        answer_text = query_result['DocumentExcerpt']['Text']\n",
    "        print(answer_text)\n",
    "\n",
    "    if query_result['Type']=='DOCUMENT':\n",
    "        if 'DocumentTitle' in query_result:\n",
    "            document_title = query_result['DocumentTitle']['Text']\n",
    "            print('Title: ' + document_title)\n",
    "        document_text = query_result['DocumentExcerpt']['Text']\n",
    "        print(document_text)\n",
    "    print ('------------------\\n\\n') "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "conda_python3",
   "language": "python",
   "name": "conda_python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
